	
capture cd D:\Dropbox\book_welfare\replication 

****************************************************************
* define the numbers of books to keep to mimic the 1960s
use  data\bookstat_2021_welfare_asin.dta, clear  
		keep if year==2021 & q>0 
		keep if pyear>=1960 & pyear<=2021 
		
		gen dfemale = mshare<0.5 if mshare~=. 
		collapse (sum) q (count) n=q, by(pyear dfemale)
		keep if dfemale~=.
		reshape wide q n , i(pyear) j(dfemale)
		gen nratio = n1/n0 
		twoway (line nratio pyear)
		su nratio if pyear>=1960 & pyear<1970 
		local a=r(mean)
		gen Nfcap = `a'*n0
		
		keep pyear Nfcap  
		
save data\Nfcap_bs.dta, replace 


*******************************************************************************
* create the dataset to be used repeatedly for prediction regressions 	
use data\bookstat_2021_welfare_asin.dta, clear  
		keep if year==2021 & q>0 
		keep if pyear>=1960 & pyear<=2021 
		gen dfemale = mshare<0.5 if mshare~=. 

		merge m:1 pyear using "data\Nfcap_bs.dta"

		replace dfemale =0 if dfemale==. 
		keep if q> 0 

		egen xA= sum(q) if pyear<year, by(author)
		egen A=max(xA), by(author)

		mvencode A, mv(0) 

		gen lA = ln(A+1)
		gen dmiss=A==0 

		egen Q = sum(q)
		gen M = 10e+08
		gen s=q/M
		gen s0 = (M-Q)/M

		gen  fbook =dfemale 
		rename pyear pubyr 

		egen gno=group(genre)

		keep s s0 q Q fbook  pubyr Nfcap genre lA dmiss   dfemale
save data\Xbs.dta, replace 
*********************************************




*********************************************
*********************************************
* create a file in which to collect the results 
clear 
set obs 1 
gen v1=.
save data\collect_bs.dta, replace
*********************************************
*********************************************


 foreach sigma in  .25 .373  .5 .75  {
				use data\Xbs.dta, clear 
				egen gno =group(genre)
				gen sigma = `sigma'
				  
				gen double delta = log(s) - log(s0) - sigma*log(q/Q)

				* run prediction regression
				 reg delta  lA i.gno dmiss fbook 
					 predict dh 
					 gen res=delta-dh 
					 gen res_sq = res^2 
					 
					 * twoway (mband res_sq dh ) 
					 
					gen sighat = res_sq^.5 
					gen lsighat = ln(sighat)
					gsort -dh 
					gen count=_n 

				*characterize the distribution of deltas near status quo entry 
				regress dh count if _n/_N>.9 & fbook==0
					estimates save clean_room\dh, replace 
				regress lsighat count if _n/_N>.9 & fbook==0
					estimates save clean_room\sighat, replace 


				* create gender-specific rank 	
				gsort fbook pubyr  -dh 
				bysort fbook pubyr: gen rank=_n 

				* choose which female books to eliminate
				gen drop = (fbook==1 & rank > Nfcap)
				gsort -dh 

				* initial cs overall
				preserve 
					gen double M = 10.00e+07
					gen expdelta = exp(delta/(1-sigma))
					egen D=sum(expdelta) 
					gen q0 = M*(expdelta/D)*(D^(1-sigma))/(1+D^(1-sigma))
					gen double CS= log(1 + D^(1-sigma))
					su CS
					local cs0=r(mean)
					su q0 if _n/_N>.95 
					local old = r(mean)
					gen s_in = [D^(1-sigma)]/[1+D^(1-sigma)]
					gen rev=p*q0
					egen REV = sum(rev)
					gen revf=p*q0*dfemale
					egen REVF = sum(revf)
					gen revm=p*q0*(1-dfemale)
					egen REVM = sum(revm)

					su REV
					local rev0=r(mean) 
					su REVF
					local rev0f=r(mean) 
					su REVM
					local rev0m=r(mean) 

				restore 



				gen double M = 10.00e+07

				gen expdelta = exp(delta/(1-sigma))
				egen D=sum(expdelta) 
				gen q0 = M*(expdelta/D)*(D^(1-sigma))/(1+D^(1-sigma))

				gen rev=p*q0
				egen REV = sum(rev)
				gen revf=p*q0*dfemale
				egen REVF = sum(revf)
				gen revm=p*q0*(1-dfemale)
				egen REVM = sum(revm)
						
				 
		*create a local variable (N) showing the status quo number of male-authored books in the choice set 
		su count if fbook==0 
		local N = r(N)

		
		
		* create a dataset called Abs.dta that contains simulated additional male-authored books 
		preserve 
			clear 
			set obs 2000000
			gen count=_n 
			
			estimates use clean_room\dh
				local a = _b[_cons]
				local b = _b[count]
			estimates use clean_room\sighat 
				local c = _b[_cons]
				local d = _b[count]
			
			gen xdelta = `a' + `b'*count 
			set seed 99 
			drawnorm xepsilon 
			
			gen epsilon = (exp(`c' + `d'*count))*xepsilon 
			
			gen delta =xdelta + epsilon 
			
			keep if _n>`N'
 
			keep delta count 
			gen fbook=0 
			
			save clean_room\Abs.dta, replace
		restore 
	

	
		local T=_N
		gen TT = _N 
		append using data\Abs.dta
		save data\Cbs.dta, replace 
		
		
		***********  Start with all existing plus hypothetical additional male products
		* First, calculate the average quantity for marginal entering products with status quo entry (old)
		
use data\Cbs.dta, clear 
		local T = TT 
		replace sigma=`sigma' if sigma==. 

		drop count 
		gen count=_n 

		drop M expdelta D q0 

		gen double M = 10.00e+07
		gen expdelta = exp(delta/(1-sigma))
		gen D=sum(expdelta) 
		gen q0 = M*(expdelta/D)*(D^(1-sigma))/(1+D^(1-sigma))
		gen double CS= log(1 + D^(1-sigma))

		su q0
		su q0 if _n>`T'-5000 & _n<`T' 
		local old = r(mean)
		di `old'

		keep count q0  CS


		
		*********** without women: remove female entry, then keep products until the quantity falls to
use data\Cbs.dta, clear
		drop if drop==1 

		replace sigma=`sigma' if sigma==. 

		drop count 
		gen count=_n 
			
		drop M expdelta D q0 
		gen double M = 10.00e+07
		gen expdelta = exp(delta/(1-sigma))
		gen D=sum(expdelta) 
		gen q0 = M*(expdelta/D)*(D^(1-sigma))/(1+D^(1-sigma))





		preserve 
			reg q0 count if count>500000
			gen nstar =  (`old' - _b[_cons])/_b[count]
			su nstar
			local nstar=r(mean)
			di `nstar'
		restore 
			
			
			
			gen double CS= log(1 + D^(1-sigma))
			su CS if _n==int(`nstar')
			local csw = r(mean)


			drop if _n>int(`nstar')

			drop D 
			egen D=sum(expdelta) 

			gen s_in1 = [D^(1-sigma)]/[1+D^(1-sigma)]
			gen s1=[expdelta/(D)]*s_in1
			gen rev1=p*M*s1
			egen REV1 = sum(rev1)
			gen revf1=p*M*s1*dfemale
			egen REVF1 = sum(revf1)
			gen revm1=p*M*s1*(1-dfemale)
			egen REVM1 = sum(revm1)
			gen qq1 = s1*M
			gen qqf1 = s1*dfemale*M 
			gen qqm1 = s1*(1 - dfemale)*M 
			egen QQ1 = sum(qq1)
			egen QQF1 = sum(qqf1)
			egen QQM1 = sum(qqm1)

			keep count q0 CS* sigma REV*


			 drop CS 

			gen CS=`cs0'
			gen CSW = `csw'

			gen REV0 = `rev0'
			gen REVm = `rev0m'
			gen REVf = `rev0f'


			keep CS  CSW  sigma REV*
			keep if _n==1 
		append using data\collect_bs.dta
		save data\collect_bs.dta, replace 
			
	}
	
		
		
	use data\collect_bs.dta, clear 
		gen dataset="Bookstat, 2021"
		append using clean_room\collect.dta
		replace dataset="Goodreads, 2016" if dataset==""
		
		gen csratio = 100*(CS/CSW -1)
		gen csfratio = 100*(CSf/CSWf -1)
		gen csmratio = 100*(CSm/CSWm -1)
		
	
		gen revratio = 100*(REV0/REV1 -1)
		gen revfratio = 100*(REVf/REVF1 -1)
		gen revmratio = 100*(REVm/REVM1 -1)

		
		format rev* cs* %9.2f
		duplicates drop 
		gsort dataset sigma 
		br dataset sigma cs* rev* if sigma~=.

**************************************************
* no predictability, no endogenous male entry response	
**************************************************

capture cd D:\Dropbox\book_welfare\replication



use  data\bookstat_2021_welfare_asin.dta, clear  
		keep if year==2021 & q>0 
		keep if pyear>=1960 & pyear<=2021 
		
		gen dfemale = mshare<0.5 if mshare~=. 
		
		collapse (sum) q (count) n=q, by(pyear dfemale)
		keep if dfemale~=.
		reshape wide q n , i(pyear) j(dfemale)
		gen nratio = n1/n0 
		twoway (line nratio pyear)
		su nratio if pyear>=1960 & pyear<1970 
		local a=r(mean)
		gen Nfcap = `a'*n0

		keep pyear Nfcap  

	
use  data\bookstat_2021_welfare_asin.dta, clear  
		keep if year==2021 & q>0 
		keep if pyear>=1960 & pyear<=2021 
		gen dfemale = mshare<0.5 if mshare~=. 
		
		
		merge m:1 pyear using "data\Nfcap_bs.dta"


		replace dfemale =0 if dfemale==. 
		keep if q> 0 
		
		egen xA= sum(q) if pyear<year, by(author)
		egen A=max(xA), by(author)
		
		mvencode A, mv(0) 
		
		gen lA = ln(A+1)
		gen dmiss=A==0 

	
		egen Q = sum(q)
		gen M = 10e+08
		gen s=q/M
		gen s0 = (M-Q)/M

		gen  fbook =dfemale 
		rename pyear pubyr 

		egen gno=group(genre)

		keep s s0 q Q fbook  pubyr Nfcap genre lA dmiss   dfemale
	save data\Xbs.dta, replace 
 *********************************************

*********************************************
*********************************************
clear 
set obs 1 
gen v1=.
save data\collect_bs_upper.dta, replace
*********************************************
*********************************************


	foreach sigma in  .25 .373  .5 .75  {
		
		use data\Xbs.dta, clear 
		egen gno =group(genre)
		 
		gen sigma = `sigma'
		
		gen double delta = log(s) - log(s0) - sigma*log(q/Q)
		
		* 86 as seed for Maxwell Smart 
		set seed 86 
		drawnorm x 
		gsort fbook pubyr  -x 
		bysort fbook pubyr: gen rank=_n 

		gen drop = (fbook==1 & rank > Nfcap)
		gsort -x 
		
		******************************
		* status quo CS and REV
		preserve 
			gen double M = 10.00e+07
			gen expdelta = exp(delta/(1-sigma))
			egen D=sum(expdelta) 
			gen q0 = M*(expdelta/D)*(D^(1-sigma))/(1+D^(1-sigma))
			gen double CS= log(1 + D^(1-sigma))			

			gen rev=p*q0
			egen REV = sum(rev)
			gen revf=p*q0*dfemale
			egen REVF = sum(revf)
			gen revm=p*q0*(1-dfemale)
			egen REVM = sum(revm)
			
			su CS REV REVF REVM 
			collapse (mean) CS REV REVF REVM sigma 
			gen regime = "status quo"
			append using data\collect_bs_upper.dta
			save  data\collect_bs_upper.dta, replace 

		restore 
		
		***************************************
		* without influx 
		preserve 
			drop if drop==1 
			gen double M = 10.00e+07
			gen expdelta = exp(delta/(1-sigma))
			egen D=sum(expdelta) 
			gen q0 = M*(expdelta/D)*(D^(1-sigma))/(1+D^(1-sigma))
			gen double CS= log(1 + D^(1-sigma))			

			gen rev=p*q0
			egen REV = sum(rev)
			gen revf=p*q0*dfemale
			egen REVF = sum(revf)
			gen revm=p*q0*(1-dfemale)
			egen REVM = sum(revm)
			
			su CS REV REVF REVM 
			collapse (mean) CS REV REVF REVM  sigma
			gen regime = "no influx"
			append using data\collect_bs_upper.dta
			save  data\collect_bs_upper.dta, replace 
			
		restore 
}		
		use data\collect_bs_upper.dta, clear
	

		gsort sigma -regime 
		gen cs_ch = 100*(CS/CS[_n+1]-1)
		gen rev_ch = 100*(REV/REV[_n+1]-1)
		gen revf_ch = 100*(REVF/REVF[_n+1]-1)
		gen revm_ch = 100*(REVM/REVM[_n+1]-1)
		
		format rev* cs* %9.2f
	
		br sigma *_ch  if regime=="status quo"
		
		
		